home *** CD-ROM | disk | FTP | other *** search
Text File | 1995-05-20 | 7.7 KB | 316 lines | [TEXT/KEEN] |
- # $CompareFiles: display mismatched lines from two files.
- #Usage
- #Use “MFS selected files” input option after selecting
- #the two files you wish to compare
- #Leave “Show stdout” selected
- #Recommended: do not adjust the variables
- #dynamic_minimum or constant_minimum unless you suspect the
- #comparison is faulty. In this case, first try
- # dynamic_minimum=4
- #and if that doesn’t work, try
- # constant_minimum=2 or more, whatever you think is needed.
- #
- #To do:
- # -display mismatched lines in surrounding context
- # -side-by-side display of mismatched lines
- # -performance bogs down badly if mismatch > 50 lines
- #
-
- ####### Nuisance miscompares due to just different spaces and tabs?
- # To ignore just white-space differences, search for the
- # two instances of "##gsub..." below and enable them by
- # deleting the "##" just before the gsub's.
-
- #How it works:
- #This is a simple version of MPW’s “Compare” tool, implementation
- #here taken from the description in the MPW manual - note function
- #is not quite the same, but close. Lines are read from both files;
- #when a mismatch is encountered, lines are stacked up one at a time
- #from each file into the arrays s1[], s2[]. As each pair of lines is
- #added, the arrays s1 and s2 are checked for a group of lines that
- #matches up again. This group must be “G” lines long, where G is either
- #calculated from a formula or set to the constant value “constant_minimum”,
- #if that variable has been set with the setup dialog. A minimum value may
- #also be set if G is calculated, by setting the value of “dynamic_minimum”
- #in the setup dialog. The formula used to calculate G is
- # G = int((2*log(M)/c10)+2)
- #where M is the number of lines in s1[] (or s2[]) and
- # c10 = log(10), ie we’re doing the logs base 10.
- #Note the minimum calculated value of G is 2, and it increases very slowly.
- #
- #Variables:
- #Name use
- #----- ------------------------------------------------------
- #dynamic_minimum when G is calculated, this is used as minimum
- # value; default minimum is 2
- #constant_minimum used as the fixed value for G instead of calculating it
- # - minimum value is 2 if it is not zero.
- #(For default operation, leave both dynamic_minimum=0
- #and constant_minimum=0.)
- #
- #Illustrates:
- #-dispensing with the default input retrieval, doing it with getline
- #(in fact, reading from two files at once).
- #-how to get lines from file, or temporary buffer if lines have already
- #been read in, gory details isolated in “GetNextLine()”.
- #-top down structured approach - note the only pattern/action is the
- #BEGIN block, everything important is done with functions.
- #
- # User’s Manual references:
- # «hAWK User’s Manual» «F Running hAWK programs»
- # «hAWK User’s Manual» «L 5 Regular expressions»
- # «hAWK User’s Manual» «M 5 Built-in string and file functions»
- # «hAWK User’s Manual» «K 4 Built-in variables»
- # «hAWK User’s Manual» «K 8 Arrays»
- # «hAWK User’s Manual» «N User-defined functions»
- # «hAWK User’s Manual» «P 3 The getline function»
- # «hAWK User’s Manual» «O 3 Output into files»
- # «hAWK User’s Manual» «Q The hAWK function»
-
- BEGIN { if (ARGC != 3)
- {
- print "Two files at a time, please"
- exit
- }
- file1 = ARGV[1]
- file2 = ARGV[2]
- z1 = split(ARGV[1], names1, ":")
- z2 = split(ARGV[2], names2, ":")
- c10 = log(10)
- #G = int((2*log(M)/c10)+2)
- if (constant_minimum+0 != 0 && constant_minimum+0 < 2)
- constant_minimum = 2
- if (constant_minimum+0 > 0)
- MMAX = 50
- else
- MMAX = 1000
- CompareFiles();
- }
-
- function CompareFiles()
- {
- print "Comparison of", file1, "(first file)"
- print "with", file2, "(second file):"
- print ""
- while (GetNextLines())
- {
- ++FNR1; ++FNR2;
- if (x1 != x2)
- {
- anyMisMatch = 1
- Resynch()
- }
- }
- if (EOF1)
- {
- if ((getline x2 < file2) > 0)
- {
- #first file ran out, second keeps going...
- anyMisMatch = 1
- print "----------Extra lines at end of second file at", names2[z2], FNR2
- do
- {
- print x2;
- } while ((getline x2 < file2) > 0);
- }
- }
- else #EOF2, and implied miscompare since EOF1 not true
- {
- anyMisMatch = 1
- print "----------Extra lines at end of first file at", names1[z1], FNR1
- do
- {
- print x1;
- } while ((getline x1 < file1) > 0);
- }
- if (anyMisMatch == 0)
- print "----------files match completely."
- }
-
- function GetNextLines()
- {
- if (t1Next < t1Max)
- {
- x1 = t1[++t1Next]
- delete t1[t1Next]
- }
- else
- {
- if ((getline x1 < file1) <= 0)
- {
- EOF1 = 1
- return 0
- }
- ##gsub(/[ \t]+/, "", x1);##enable this to ignore white space differences
- }
-
- if (t2Next < t2Max)
- {
- x2 = t2[++t2Next]
- delete t2[t2Next]
- }
- else
- {
- if ((getline x2 < file2) <= 0)
- {
- EOF2 = 1
- return 0
- }
- ##gsub(/[ \t]+/, "", x2);##enable this to ignore white space differences
- }
-
- return 1
- }
-
- function Resynch()
- {
- s1[1] = x1
- s2[1] = x2
- M = 1
- do
- {
- if (!GetNextLines())
- {
- if (EOF1)
- {
- print "!!!End of first file, ", names1[z1]
- print "encountered during mismatched section."
- PrintMiscompare();
- exit
- }
- else
- {
- print "!!!End of second file, ", names2[z2]
- print "encountered during mismatched section."
- PrintMiscompare();
- exit
- }
- }
- s1[++M] = x1
- s2[M] = x2
- } while (!GroupMatch() && M <= MMAX);
- if (M > MMAX)
- {
- print "!!!Mismatch over", MMAX, "lines, quitting early."
- exit
- }
- #Group match - print out miscompare
- PrintMiscompare();
- }
-
- function GroupMatch()
- {
- if (constant_minimum+0 > 0)
- G = constant_minimum
- else
- {
- G = int((2*log(M)/c10)+2)
- if (G < dynamic_minimum+0)
- G = dynamic_minimum
- }
- #Attempt to match last G lines of s1 against s2
- #-start from beginning of s2
- GMinus = M-G+1
- for (i = 1; i <= GMinus; ++i)
- {
- while (i <= GMinus && s1[GMinus] != s2[i])
- ++i;
- if (i <= GMinus)
- {
- j = i + 1
- numMatched = 1
- while (s1[GMinus + numMatched] == s2[j] &&
- ++numMatched < G && j <= M)
- ++j;
- if (numMatched == G)
- {
- s1First = GMinus
- s2First = i
- return 1
- }
- }
- }
- #Attempt to match last G lines of s2 against s1
- #-start from beginning of s1
- for (i = 1; i <= GMinus; ++i)
- {
- while (i <= GMinus && s2[GMinus] != s1[i])
- ++i;
- if (i <= GMinus)
- {
- j = i + 1
- numMatched = 1
- while (s2[GMinus + numMatched] == s1[j] &&
- ++numMatched < G && j <= M)
- ++j;
- if (numMatched == G)
- {
- s2First = GMinus
- s1First = i
- return 1
- }
- }
- }
- return 0
- }
-
- #Lines added to one file is same as lines deleted from other file
- #If match doesn't start at beginning of one buffer, then print
- #mismatching lines from beginnings of both buffers
- function PrintMiscompare()
- {
- print "------------------------------------------------------------------------"
- if (s1First == 1)
- {
- print "----------Extra lines in second file at", names2[z2], FNR2, " (vs", names1[z1], FNR1, ")"
- for (i = 1; i < s2First; ++i)
- print s2[i]
-
- }
- else if (s2First == 1)
- {
- print "----------Extra lines in first file at", names1[z1], FNR1, " (vs", names2[z2], FNR2, ")"
- for (i = 1; i < s1First; ++i)
- print s1[i]
- }
- else
- {
- print "----------Mismatching lines in both files:"
- print "----------from first file at", names1[z1], FNR1
- for (i = 1; i < s1First; ++i)
- print s1[i]
- print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"
- print "----------from second file at", names2[z2], FNR2
- for (i = 1; i < s2First; ++i)
- print s2[i]
- }
- print "------------------------------------------------------------------------"
- print ""
- s1First += G
- s2First += G
- FNR1 += s1First - 2
- FNR2 += s2First - 2
- if (s1First <= M)
- {
- j = t1Max;
- for (i = s1First; i <= M; ++i)
- {
- t1[++j] = s1[i]
- }
- t1Max = j
- }
- if (s2First <= M)
- {
- j = t2Max
- for (i = s2First; i <= M; ++i)
- {
- t2[++j] = s2[i]
- }
- t2Max = j
- }
- for (i in s1)
- delete s1[i]
- for (i in s2)
- delete s2[i]
- }
-